On this page, we will be exploring the overall trends for our key outcome- sleeping hours per day, and how these outcomes correlate with the predictors, including demographic variables like age, race, sex, education level, and poverty status.
library(tidyverse)
library(patchwork)
library(knitr)
library(dplyr)
library(gganimate)
library(gifski)
library(png)
library(plotly)
library(ggridges)
Short sleep duration is based on age group recommended hours of sleep per day and defined as less than 7 hours for adults. This plot shows age-adjusted prevalence of adults who reported short sleep duration from 2014 to 2020. Overall, short sleep duration prevalence was higher among males than females across years.
year_df=read_csv("data/AdultTrends.csv") %>%
janitor::clean_names() %>%
pivot_longer(cols=c("female", "male"),
names_to = "sex",
values_to = "age_adjust_prev") %>%
ggplot(aes(x=year,y=age_adjust_prev,col=sex))+geom_line()+geom_point()+labs(
title = "National trends in short sleep duration",
x = "Year",
y = "Age Adjusted Prevalence %"
)
year_df+transition_reveal(year)
We first want to get the distribution of sleeping hours less than 7 hours across the five different education levels. We will construct a bar chart tabulating the average sleeping hours per week in each of the five education levels. Gender consideration was also added into to the bar chart in order to see a difference between female and male in each category.
edu_plot=slp_df %>%
filter(weekday_slp_hr<7)%>%
group_by(education_level,gender) %>%
summarize(ave_sleep=mean((weekday_slp_hr*5+weekend_slp_hr*2)/7)) %>%
ungroup() %>%
mutate(education_level=fct_reorder(education_level,ave_sleep)) %>%
ggplot(aes(x=education_level,y=ave_sleep,fill=gender))+ geom_bar(width=0.5,stat="identity")+
viridis::scale_fill_viridis(
name = "gender",
discrete = TRUE
) + geom_text(aes(label = round(ave_sleep, 2)),position = position_stack(vjust=0.9), color = "white", size = 4)+
theme(axis.text.x = element_text(angle = -90, vjust = 0.5, hjust=1))+labs(
title = "Distribution of sleeping hours across education level",
x = "Education Level",
y = "Average Sleeping hours per day"
)
edu_plot
From the above plot, high school graduates has the least sleeping hours, while college graduates or above has the highest. Female and male does not have a significant difference in both groups.
A heat map was made to visualize sleeping hours less than 7 hours among different races. The below plot indicates that there is a large number of people who has a sleeping hour of 6 hours per day among the Non-Hispanic black group. There are also many Non-Hispanic Whites who has an average of 6 hours per day.
race_plot=slp_df %>%
filter(weekday_slp_hr<7) %>%
mutate(sleep_ave=(weekday_slp_hr*5+weekend_slp_hr*2)/7) %>%
group_by(race,sleep_ave) %>%
summarise(obs=n()) %>%
plot_ly(
x = ~sleep_ave, y = ~race, z = ~obs, type = "heatmap", colors = "BuPu"
) %>%
colorbar(title = "Number of People", x = 1, y = 0.5)
layout(race_plot, xaxis = list(title = "Average Sleeping Hours Per Day"), yaxis = list(title = "Race"))
The below plot demonstrates the gender gap in the patients for different races. Male outnumber female for sleeping less than 7 hours per day for all race, except Non-Hispanic Black and Non-Hispanic Asian. The bubble represents the degree of the gap, along inlcuding their education level.
gender_plot=slp_df %>%
filter(weekday_slp_hr<7) %>%
group_by(race,education_level) %>%
summarize(total_f=sum(gender=="female"),
total_m=sum(gender=="male"),
gap=total_m-total_f) %>%
mutate(text_lable=str_c("Race=",race,"\nEducation level: ", education_level)) %>%
plot_ly(x=~total_m,y=~total_f,text=~text_lable,color=~race,size=~gap,type="scatter",mode="markers",
colors="viridis",sizes = c(50, 700), marker = list(opacity = 0.7))
layout(gender_plot, title = "Race Gender Gap by Education Level", xaxis = list(title = "Number of Male Sleeping less than 7 hrs"), yaxis = list(title = "Number of Female Sleeping less than 7 hrs"))
income_df=slp_df %>%
filter(weekday_slp_hr<7) %>%
mutate(ip_stat=case_when(income_poverty_ratio > 1 ~ "not in poverty",
income_poverty_ratio < 1~ "in poverty",
income_poverty_ratio == 1~ "in poverty")) %>%
ggplot(aes(x=weekday_slp_hr,y=ip_stat,fill=ip_stat))+
geom_density_ridges(
aes(point_color = ip_stat, point_shape = ip_stat,point_fill=ip_stat),
alpha = .3, point_alpha = 0.7)+
scale_x_continuous(
breaks = c(2, 4, 6),
labels = c("2hrs", "4hrs", "6hrs"),
limits = c(2, 6)
)
box_plot=
slp_df %>%
filter(weekday_slp_hr<6) %>%
mutate(ip_stat=case_when(income_poverty_ratio > 1 ~ "not in poverty",
income_poverty_ratio < 1~ "in poverty",
income_poverty_ratio == 1~ "in poverty")) %>%
mutate(sleep_ave=(weekday_slp_hr*5+weekend_slp_hr*2)/7) %>%
ggplot(aes(x=ip_stat,y=sleep_ave))+geom_boxplot(aes(fill = ip_stat), alpha = 0.3)+
geom_hline(aes(yintercept=median(sleep_ave),
color="red", linetype="dashed"))+
geom_text(aes(0, median(weekday_slp_hr), label = "sleep hours median"), vjust = -0.5, hjust = 0, color = "red")
comb=income_df+box_plot
comb+plot_annotation(
title = "Sleeping Hours By Poverty Status"
)
age_group= slp_df%>%
filter(weekday_slp_hr<7) %>%
mutate(age_gp=case_when(age>=20 & age<=30 ~ "20-30",
age>=31 &age <=40 ~ "31-40",
age>=41 &age<=50 ~ "41-50",
age>=51 &age<=60 ~ "51-60",
age>=61 &age<=70 ~ "61-70",
age>=71 & age <=80 ~ "71-80")) %>%
group_by(age_gp) %>%
summarise(ave_slp=mean((weekday_slp_hr*5+weekend_slp_hr*2)/7))%>%
ungroup() %>%
mutate(age_gp=fct_reorder(age_gp,ave_slp)) %>%
ggplot(aes(x=age_gp,y=ave_slp,fill=age_gp))+ geom_bar(stat="identity")+ scale_fill_viridis_d()+
theme(axis.text.x = element_text(angle = -90, vjust = 0.5, hjust=1))+
geom_text(aes(label = round(ave_slp, 2)),position = position_stack(vjust=0.9), color = "white", size = 4)
age_group